'''
Created on 2017-4-14

@author: 24253
'''

from bs4 import BeautifulSoup as BS
import os,xlwt,xlrd,traceback
from urllib import request
from xlrd import open_workbook
from xlutils.copy import copy



    
def get_content(url):
    
    _html=request.urlopen(url).read()
    _soup=BS(_html,"lxml")
    
    _content=_soup.find_all("div",attrs={"class":"info clear"})
    print(_content[0])
    print(_content[2])
    print(_content[4])
    i=0
    for i in range(len(_content)-1):
        _title=_content[i].find(attrs={"class":"title"}).text
        print(_title)
        _address=_content[i].find(attrs={"class":"address"}).text
        print(_address)
        _flood=_content[i].find(attrs={"class":"flood"}).text
        print(_flood)
        _followInfo=_content[i].find(attrs={"class":"followInfo"}).text
        print(_followInfo)
        _tag=_content[i].find(attrs={"class":"tag"}).text
        print(_tag)
        _price=_content[i].find(attrs={"class":"priceInfo"})
        _totalprice=_price.find(attrs={"class":"totalPrice"}).text
        _unitprice=_price.find(attrs={"class":"unitPrice"}).text
        print(_totalprice+"----"+_unitprice)

        write_to_excel([_title,_address,_flood,_tag,_totalprice,_unitprice])
    
def write_to_excel(content):
    path="d:"+os.sep+"shujuxinxi.xls"
    try :
        rb=xlrd.open_workbook(path)
        
    except :
        print("创建Excel")
        filename=xlwt.Workbook()
        filename.add_sheet("sheet1")
        filename.save(path)
        rb=xlrd.open_workbook(path)
        
    table = rb.sheet_by_index(0)
    start_row = table.nrows
    wb = copy(rb) 
    table = wb.get_sheet(0)
    for i in range(len(content)):
        table.write(start_row + 1, i+1,content[i])
    wb.save(path)

if __name__=="__main__" :
    for page in range(1, 3):
        start_url = "http://sz.lianjia.com/ershoufang/pg/"+str(page)
        get_content(start_url)
    
    
        
    
